In [3]:
!pip install folium
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import folium
from folium.plugins import HeatMap
from folium.plugins import HeatMapWithTime
import datetime
import statsmodels.formula.api as sm
Requirement already satisfied: folium in /opt/conda/lib/python3.7/site-packages (0.10.1)
Requirement already satisfied: branca>=0.3.0 in /opt/conda/lib/python3.7/site-packages (from folium) (0.3.1)
Requirement already satisfied: numpy in /opt/conda/lib/python3.7/site-packages (from folium) (1.17.0)
Requirement already satisfied: jinja2>=2.9 in /opt/conda/lib/python3.7/site-packages (from folium) (2.10.1)
Requirement already satisfied: requests in /opt/conda/lib/python3.7/site-packages (from folium) (2.22.0)
Requirement already satisfied: six in /opt/conda/lib/python3.7/site-packages (from branca>=0.3.0->folium) (1.12.0)
Requirement already satisfied: MarkupSafe>=0.23 in /opt/conda/lib/python3.7/site-packages (from jinja2>=2.9->folium) (1.1.1)
Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /opt/conda/lib/python3.7/site-packages (from requests->folium) (3.0.4)
Requirement already satisfied: idna<2.9,>=2.5 in /opt/conda/lib/python3.7/site-packages (from requests->folium) (2.8)
Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /opt/conda/lib/python3.7/site-packages (from requests->folium) (1.25.3)
Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.7/site-packages (from requests->folium) (2019.6.16)
In [4]:
original = pd.read_excel('Traffic_big.xlsx')
In [5]:
# Converting the Latitude and Longitude Attributes to a Float
original["Latitude"] = original["Latitude"].astype(float)
original["Longitude"] = original["Longitude"].astype(float)

# Combining The Datetime and TimeStamp together to form Datetime Objects
dt = []
for index, row in original.iterrows():
    time = datetime.datetime.combine(row['Date Of Stop'], row['Time Of Stop'])
    dt.append(time)

original['datetime'] = pd.to_datetime(dt)

original = original[original["Gender"] != "U"]
original = original[(original["Year"] != 0) & (original["Year"] < 2020) & (original["Year"] > 1900)]
In [6]:
filtered_cols = ["Date Of Stop", "Time Of Stop", "datetime", "SubAgency",
                 "Description", "Location", "Latitude", "Longitude",
                 "Accident", "Belts", "Personal Injury", "Property Damage",
                 "Fatal", "Alcohol", "Work Zone", "VehicleType",
                 "Year", "Make", "Violation Type", "Contributed To Accident",
                 "Race", "Gender", "DL State"]

# Can break up the criteria above to make the dataframe more tidy
fil_df = original[filtered_cols]
In [7]:
# Make a sample (of size 5000) of the Traffic Violation dataset
sam = fil_df.sample(n = 5000)
In [8]:
def generate_map(loc = [39.1247, -77.1905], zoom = 10.5, tile = "openstreetmap"):
    res_map = folium.Map(location = loc, zoom_start = zoom, control_scale = True, tiles = tile)

    # Add the Tile (or Style) of the Map
    folium.TileLayer('openstreetmap').add_to(res_map)
    folium.TileLayer('Stamen Watercolor').add_to(res_map)
    folium.TileLayer('Stamen Toner').add_to(res_map)
    return res_map

In [9]:
# This Function returns the designated color assigned to a race.
def color_select(race):
    ethnicity = {'ASIAN': "#ed8134", # Orange
                 'BLACK': "#391cba", #Indigo
                 'HISPANIC': "#119992", #Teal 
                 'NATIVE AMERICAN': "#9412b8", # Violet 
                 'OTHER': "#127bb8", # Blue
                 'WHITE': "#e81c1c"} # Red

    return ethnicity[race]

Map Exploring the Race and Gender

In [24]:
# Creating an Empty Map
map_total = generate_map()

# other mapping code (e.g. lines, markers etc.)
asian_fg = folium.FeatureGroup(name = "Asian")
black_fg = folium.FeatureGroup(name = "Black")
his_fg = folium.FeatureGroup(name = "Hispanic")
na_fg = folium.FeatureGroup(name = "Native American")
other_fg = folium.FeatureGroup(name = "Other")
white_fg = folium.FeatureGroup(name = "White")

race = {'ASIAN': asian_fg,
        'BLACK': black_fg,
        'HISPANIC': his_fg,
        'NATIVE AMERICAN': na_fg,
        'OTHER': other_fg,
        'WHITE': white_fg}

for ind, row in sam.iterrows():

    entry = (folium.RegularPolygonMarker(location = [row["Latitude"],row["Longitude"]], popup = row["Description"],
                                        color= color_select(row["Race"]), fill = True, weight = 1,
                                        number_of_sides = 3 if row["Gender"] == "M" else 6,
                                        radius = 4, opactity = .4))
    entry.add_to(race[row["Race"]])

for r in race:
    race[r].add_to(map_total)

folium.LayerControl().add_to(map_total)

map_total
Out[24]:

Bar Graph Relating Gender, Race, and the number of Traffic Violation

In [11]:
gr_df = sam.copy()
gr_df["count"] = 1

aggregation_functions = {'count': 'sum'}
nd = gr_df.groupby(['Gender', 'Race']).aggregate(aggregation_functions)

# Setting up the plot and dimension
fig, axs = plt.subplots()
fig.set_figheight(30)
fig.set_figwidth(40)

b1 = sns.barplot(x="Gender", y ="count", hue="Race", palette = "Spectral", data=nd.reset_index(), ax = axs)
b1.legend(loc='upper center', bbox_to_anchor=(0.5, -0.05),
          fancybox=True, shadow=True, ncol=3, labelspacing=2, fontsize = 20)

b1.set_title("The Occurrence of Traffic Violation Based on Gender and Race", fontsize = 40)
b1.set_ylabel("Count", fontsize = 30)
b1.set_xlabel("Gender", fontsize = 30)
b1.tick_params(axis='both', labelsize=25)

plt.show()

HeatMap Exploring the Occurrence based on Time (Hours)

In [12]:
sam["hour"] = [t.hour for t in sam["Time Of Stop"]]
cut = pd.cut(sam["hour"], bins = [0,2,4,6,8,10,12,14,16,18,20,22,24],
             labels = [1,2,3,4,5,6,7,8,9,10,11,12], right = False)
sam["cut"] = cut
In [13]:
df_copy = sam.copy()
df_copy['count'] = 1
hr_map = generate_map()

hm_fg = []
hr = 0
for ind in range(12):
    temp_name = "Hours " + str(hr) + " to " + str(hr + 1)
    hm_fg.append(folium.FeatureGroup(name = temp_name, show= True if ind == 0 else False))
    hr += 2


# Group time together to have more during a specifc set of hours\
for index in range(12):
    temp = df_copy[df_copy["cut"] == index + 1]
    HeatMap(data=temp[['Latitude', 'Longitude', 'count']]
                .groupby(['Latitude', 'Longitude', 'count'])
                .sum()
                .reset_index()
                .values.tolist(),
                radius=8, max_zoom=13).add_to(hm_fg[index])

for fg in hm_fg:
    fg.add_to(hr_map)


folium.LayerControl().add_to(hr_map)

hr_map
Out[13]:

HeatMap Over Time

In [14]:
time_map = generate_map()
df_hour_list = []
for hour in df_copy["cut"].sort_values().unique():
    df_hour_list.append(df_copy.loc[df_copy.hour == hour, ['Latitude', 'Longitude', 'count']]
                        .groupby(['Latitude', 'Longitude']).sum().reset_index().values.tolist())

HeatMapWithTime(df_hour_list, radius=8, gradient={0.2: 'blue', 0.4: 'lime', 0.6: 'orange', 1: 'red'},
                min_opacity=0.5, max_opacity=0.8, use_local_extrema=True, auto_play=True).add_to(time_map)

folium.LayerControl().add_to(time_map)

time_map
Out[14]:

Total HeatMap

In [15]:
df_copy = sam.copy()
df_copy['count'] = 1
base_map = generate_map()

HeatMap(data=df_copy[['Latitude', 'Longitude', 'count']]
            .groupby(['Latitude', 'Longitude', 'count'])
            .sum()
            .reset_index()
            .values.tolist(),
            radius=8, max_zoom=13).add_to(base_map)

folium.LayerControl().add_to(base_map)

base_map
Out[15]:

Exploring the Vehicle Type and Year

In [16]:
##### VehicleType, Year, Make
vy_df = sam.copy()
vy_df['count'] = 1
vy_df = vy_df.sort_values("Year")

aggregation_functions = {'count': 'sum'}
nd2 = vy_df.groupby(['Year', 'VehicleType']).aggregate(aggregation_functions)


# Setting up the plot and dimension
fig, axs = plt.subplots()
fig.set_figheight(30)
fig.set_figwidth(40)

l = sns.lineplot(x = "Year", y = "count", hue = "VehicleType",data = nd2.reset_index(), ax = axs)
l.legend(loc='upper center', bbox_to_anchor=(0.5, -0.05),
          fancybox=True, shadow=True, ncol=10, labelspacing=2, fontsize = 18)

l.set_title("The Occurrence of Traffic Violation Based on the Vehicle Type and Year", fontsize = 40)
l.set_ylabel("Occurrence", fontsize = 30)
l.set_xlabel("Year", fontsize = 30)
l.tick_params(axis='both', labelsize=25)

plt.show()

Linear Regression

In [17]:
district = np.unique(original["SubAgency"])
simp_district = {}

for dist in district:
    if dist[0] == "H":
        simp_district[dist] = 7
    else:
        simp_district[dist] = int(dist[0])
In [18]:
race_num = {'ASIAN': 0,
            'BLACK': 1,
            'HISPANIC': 2,
            'NATIVE AMERICAN': 3,
            'OTHER': 4,
            'WHITE': 5}
In [19]:
vt = {"Warning": 1,
      "Citation": 2,
      "ESERO": 3,
      "SERO": 4}
In [20]:
data_reg = sam.copy()

data_reg["district_num"] = [simp_district[d] for d in data_reg["SubAgency"]]
data_reg["gender_num"] = [0 if g == "M" else 1 for g in data_reg["Gender"] ]
data_reg["race_num"] = [race_num[r] for r in data_reg["Race"]]
data_reg["violation_type_num"] = [vt[v] for v in data_reg["Violation Type"]]

data_reg
Out[20]:
Date Of Stop Time Of Stop datetime SubAgency Description Location Latitude Longitude Accident Belts ... Contributed To Accident Race Gender DL State hour cut district_num gender_num race_num violation_type_num
77534 2017-07-05 06:34:00 2017-07-05 06:34:00 4th district, Wheaton FAILURE OF INDIVIDUAL DRIVING ON HIGHWAY TO DI... RANDOLPH RD @ BLUHILL RD 39.059008 -77.065487 No No ... No HISPANIC M MD 6 4 4 0 2 2
7786 2016-02-29 13:15:00 2016-02-29 13:15:00 5th district, Germantown DRIVER FAILURE TO OBEY PROPERLY PLACED TRAFFIC... STRINGTOWN RD @ SNOWDEN FARM PKWY 39.244095 -77.263673 No No ... No HISPANIC F MD 13 7 5 1 2 1
63991 2015-02-11 20:26:00 2015-02-11 20:26:00 5th district, Germantown DISPLAYING REG. PLATE ISSUED FOR OTHER VEHICLE MIDDLEBROOK @ CROSS RIDGE 39.174925 -77.259038 No No ... No BLACK F MD 20 11 5 1 1 2
57928 2017-03-08 22:45:00 2017-03-08 22:45:00 3rd district, Silver Spring PERSON DRIVING MOTOR VEHICLE ON HIGHWAY OR PUB... FENTON ST @ SILVER SPRING AVE 38.997718 -77.024608 No No ... No BLACK M MD 22 12 3 0 1 2
52207 2017-02-06 01:25:00 2017-02-06 01:25:00 2nd district, Bethesda DRIVING WHILE IMPAIRED BY ALCOHOL OLD GEORGETOWN RD @ DEMOCRACY BLVD 38.983335 -77.094818 No No ... No BLACK M VA 1 1 2 0 1 2
37457 2015-12-22 23:59:00 2015-12-22 23:59:00 Headquarters and Special Operations DRIVER FAIL TO STOP AT RED TRAFFIC SIGNAL BEFO... MUDDY BRANCH RD @ W DIAMOND AVE 39.140260 -77.202802 No No ... No WHITE M MD 23 12 7 0 5 2
45379 2016-05-17 09:44:00 2016-05-17 09:44:00 6th district, Gaithersburg / Montgomery Village DRIVING VEHICLE IN EXCESS OF REASONABLE AND PR... LONGDRAFT @ GREAT SENECA HWY FROM 124 39.132465 -77.244695 No No ... No WHITE M MD 9 5 6 0 5 2
33330 2018-04-04 00:01:00 2018-04-04 00:01:00 4th district, Wheaton DRIVER FAILURE TO OBEY PROPERLY PLACED TRAFFIC... UNIVERSITY BLV/SLIGO CREEK PKWY 39.036582 -77.031557 No No ... No ASIAN M MD 0 1 4 0 0 1
20206 2015-10-07 09:47:00 2015-10-07 09:47:00 5th district, Germantown EXCEEDING THE POSTED SPEED LIMIT OF 30 MPH 109/ COMUS 39.246590 -77.349740 No No ... No WHITE M CA 9 5 5 0 5 1
64331 2013-02-08 15:44:00 2013-02-08 15:44:00 1st district, Rockville MOTOR VEH. W/O REQUIRED STOP LAMPS EQUIPMENT MONTROSE PKWY / EAST JEFFERSON ST 39.050912 -77.125385 No No ... No WHITE M MD 15 8 1 0 5 1
63835 2012-04-03 14:00:00 2012-04-03 14:00:00 4th district, Wheaton KNOWINGLY DRIVING UNINSURED VEHICLE GEORGIA AVE AT VERONA DR 39.075432 -77.067923 No No ... No WHITE M MD 14 8 4 0 5 2
25620 2013-04-03 13:30:00 2013-04-03 13:30:00 4th district, Wheaton FAILURE TO ATTACH VEHICLE REGISTRATION PLATES ... GEORGIA AVE/ HINES 39.142677 -77.068165 No Yes ... No WHITE F MD 13 7 4 1 5 2
67186 2012-10-08 09:46:00 2012-10-08 09:46:00 4th district, Wheaton EXCEEDING MAXIMUM SPEED: 39 MPH IN A POSTED 30... E/B 2 BRIGHTON DAM RD (W.S.S.C REC AREA) 39.191534 -77.006783 No No ... No WHITE M MD 9 5 4 0 5 2
95248 2017-08-28 10:08:00 2017-08-28 10:08:00 1st district, Rockville EXCEEDING POSTED MAXIMUM SPEED LIMIT: 49 MPH I... 1100 BLOCK OF NB ROCKVILLE PIKE 39.073080 -77.133983 No No ... No WHITE M MD 10 6 1 0 5 2
89536 2016-03-24 20:13:00 2016-03-24 20:13:00 6th district, Gaithersburg / Montgomery Village FAILURE OF INDIVIDUAL DRIVING ON HIGHWAY TO DI... MVA / MID COUNTY HWY 39.155663 -77.208073 No No ... No HISPANIC M MD 20 11 6 0 2 1
3996 2013-11-21 22:53:00 2013-11-21 22:53:00 3rd district, Silver Spring FAILURE OF INDIVIDUAL DRIVING ON HIGHWAY TO DI... MIDDLEBROOK/270 39.178788 -77.240792 No No ... No BLACK M MD 22 12 3 0 1 2
69873 2013-10-07 09:13:00 2013-10-07 09:13:00 6th district, Gaithersburg / Montgomery Village FAILURE TO DISPLAY REGISTRATION CARD UPON DEMA... MONTGOMERY VILLAGE/CHRISTOPHER AVE 39.161420 -77.202962 No No ... No WHITE F MD 9 5 6 1 5 1
27671 2015-01-29 13:17:00 2015-01-29 13:17:00 4th district, Wheaton OPERATOR NOT RESTRAINED BY SEATBELT WB RT 28 AT FIRESTONE DRIVE 39.121502 -77.009222 No No ... No HISPANIC M MD 13 7 4 0 2 2
85297 2013-11-03 01:35:00 2013-11-03 01:35:00 4th district, Wheaton DRIVER FAILURE TO OBEY PROPERLY PLACED TRAFFIC... GRANDVIEW AVE @ ENNALLS AVE 39.039041 -77.051909 No No ... No HISPANIC M VA 1 1 4 0 2 1
98290 2013-12-06 01:14:00 2013-12-06 01:14:00 2nd district, Bethesda DRIVER ENTERING INTERSECTION AT FLASHING RED T... SUMMIT AVE/KNOWLES AVE 39.027672 -77.078038 No No ... No ASIAN M MD 1 1 2 0 0 1
19032 2017-12-10 21:21:00 2017-12-10 21:21:00 5th district, Germantown PARK IN DESIG INDIV W/DISIBILITIES SPACE W/O S... 13633 CONNECTICUT AVE 39.078052 -77.079855 No No ... No WHITE M MD 21 11 5 0 5 1
11146 2015-12-05 18:45:00 2015-12-05 18:45:00 4th district, Wheaton DRIVING VEHICLE ON HIGHWAY WITH SUSPENDED REGI... GEORGIA AVE AND VEIRS MILL RD 39.034160 -77.049485 No No ... No OTHER M MD 18 10 4 0 4 1
92098 2018-01-24 10:56:00 2018-01-24 10:56:00 4th district, Wheaton FAILURE TO ATTACH VEHICLE REGISTRATION PLATES ... UNIVERSITY BLVD W/AMHERST AVE 39.041038 -77.046528 No No ... No BLACK M MD 10 6 4 0 1 1
6676 2012-10-24 21:29:00 2012-10-24 21:29:00 3rd district, Silver Spring DRIVING VEHICLE ON HIGHWAY WITH SUSPENDED REGI... UNIVERSITY BLVD E/ GLENVILLE RD 39.003377 -76.996531 No No ... No HISPANIC F MD 21 11 3 1 2 1
13387 2016-03-23 22:22:00 2016-03-23 22:22:00 3rd district, Silver Spring OPER. MOTOR VEH. WITH OPERATOR NOT RESTRAINED ... E/B 200 @ NEW HAMPSHIRE AVE 39.086895 -77.013815 No No ... No BLACK M PA 22 12 3 0 1 1
58104 2013-05-24 15:08:00 2013-05-24 15:08:00 4th district, Wheaton UNAUTHORIZED DISPLAY AND USE OF REGISTRATION P... 12021 GEORGIA AVENUE 39.052838 -77.050139 No No ... No BLACK F MD 15 8 4 1 1 2
69522 2014-03-14 13:47:00 2014-03-14 13:47:00 4th district, Wheaton DRIVER FAILURE TO STOP AT STOP SIGN LINE RANDOLPH RD / COLIE DR 39.058342 -77.083792 No No ... No HISPANIC M MD 13 7 4 0 2 2
67217 2012-10-04 09:51:00 2012-10-04 09:51:00 4th district, Wheaton DRIVING VEHICLE ON HIGHWAY WITH SUSPENDED REGI... GOOD HOPE RD AND NEW HAMPSHIRE AVE 39.097554 -77.002692 No No ... No BLACK F MD 9 5 4 1 1 2
64553 2017-10-16 21:56:00 2017-10-16 21:56:00 4th district, Wheaton DRIVING VEHICLE ON HIGHWAY WITH SUSPENDED REGI... S/B GEORGIA AVE. @ LAYHILL RD. 39.058700 -77.050283 No No ... No HISPANIC F MD 21 11 4 1 2 1
61803 2013-05-13 21:11:00 2013-05-13 21:11:00 3rd district, Silver Spring FAILING TO SECURE CHILD UNDER AGE 8 IN CHILD S... UNIVERSITY BLVD W AT GEORGIA AVE 39.041394 -77.052540 No No ... No HISPANIC M MD 21 11 3 0 2 2
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
23091 2017-03-04 23:17:00 2017-03-04 23:17:00 4th district, Wheaton DRIVER CHANGING LANES WHEN UNSAFE N/B GEORGIA AVE. @ UNIVERSITY BLVD. W. 39.058273 -77.048253 No No ... No BLACK M DC 23 12 4 0 1 2
37897 2014-06-29 02:14:00 2014-06-29 02:14:00 2nd district, Bethesda PERSON DISPLAYING LIC. NOT ISSUED TO HIM CONNECTICUT AVE AT KNOWLES AVE 39.028450 -77.076655 No No ... No WHITE M MD 2 2 2 0 5 2
46147 2016-05-26 09:53:00 2016-05-26 09:53:00 5th district, Germantown EXCEEDING THE POSTED SPEED LIMIT OF 40 MPH RT. 118 @ BOWMAN MILL DR 39.175130 -77.272137 No No ... No BLACK M MD 9 5 5 0 1 1
77004 2017-01-09 11:33:00 2017-01-09 11:33:00 3rd district, Silver Spring EXCEEDING MAXIMUM SPEED: 54 MPH IN A POSTED 35... BRIGGS CHANEY RD / GATESHEAD MANOR WAY 39.076047 -76.940727 No No ... No BLACK M MD 11 6 3 0 1 2
90161 2015-06-21 02:13:00 2015-06-21 02:13:00 2nd district, Bethesda DRIVING VEHICLE IN EXCESS OF REASONABLE AND PR... WISCONSIN AVE @ BATTERY LA 38.995370 -77.096357 No No ... No ASIAN F MD 2 2 2 1 0 2
38637 2014-11-09 03:01:00 2014-11-09 03:01:00 2nd district, Bethesda DRIVING VEHICLE IN VIOLATION OF RESTRICTED LIC... CONNECTICUT AVE @ ROSEMARY ST 38.986408 -77.077017 No No ... No WHITE M MD 3 2 2 0 5 2
88070 2013-03-22 08:00:00 2013-03-22 08:00:00 4th district, Wheaton DRIVER FAILURE TO OBEY PROPERLY PLACED TRAFFIC... FRANKFORT DR / ASPEN HILL ROAD 39.079955 -77.082625 No No ... No BLACK F MD 8 5 4 1 1 2
63739 2013-01-22 23:30:00 2013-01-22 23:30:00 3rd district, Silver Spring DRIVER FAILURE TO AVOID PROJECTING GLARING LIG... 8100 BLK GEORGIA AVE 38.991334 -77.026640 No No ... No BLACK M MD 23 12 3 0 1 2
91593 2014-05-08 23:01:00 2014-05-08 23:01:00 6th district, Gaithersburg / Montgomery Village EXCEEDING THE POSTED SPEED LIMIT OF 55 MPH SB 270 AT SAM EIG HIGHWAY 39.125135 -77.201460 No No ... No BLACK M MD 23 12 6 0 1 1
74924 2018-03-06 07:20:00 2018-03-06 07:20:00 2nd district, Bethesda DRIVER FAILURE TO OBEY PROPERLY PLACED TRAFFIC... FREDERICK AVENUE/KENT STREET 39.021915 -77.069448 No No ... No WHITE M MD 7 4 2 0 5 1
72798 2016-09-02 16:58:00 2016-09-02 16:58:00 4th district, Wheaton FAILURE TO ATTACH VEHICLE REGISTRATION PLATES ... ASPEN HILL RD @ ORIENTAL ST 39.071207 -77.100577 No No ... No BLACK M MD 16 9 4 0 1 1
74529 2016-05-06 03:06:00 2016-05-06 03:06:00 4th district, Wheaton DRIVER FAILURE TO OBEY DESIGNATED LANE DIRECTIONS VEIRS MILL RD @ RANDOLPH RD 39.055598 -77.081807 No No ... No WHITE F MD 3 2 4 1 5 1
1264 2017-11-16 16:03:00 2017-11-16 16:03:00 4th district, Wheaton OPERATING VEHICLE ON HIGHWAY WITH UNAUTHORIZED... GRANDVIEW AVE/ BLUERIDGE AVE 39.042970 -77.052700 No No ... No HISPANIC M MD 16 9 4 0 2 1
66077 2015-03-28 15:53:00 2015-03-28 15:53:00 1st district, Rockville DRIVER FAILURE TO STOP AT STEADY CIRCULAR RED ... MD 28 / QUINCE ORCHARD RD 39.117638 -77.250928 No No ... No WHITE F MD 15 8 1 1 5 1
62314 2016-11-06 16:17:00 2016-11-06 16:17:00 2nd district, Bethesda DRIVER WHEN TURNING LEFT FAIL TO YIELD RIGHT O... CONNECTICUT AVE. @ PLYERSMILL RD. 39.029502 -77.074777 No No ... No WHITE F MD 16 9 2 1 5 2
18875 2016-08-24 22:55:00 2016-08-24 22:55:00 4th district, Wheaton FAILURE TO EQUIP VEH. WITH REQUIRED REAR STOP ... RANDOLPH RD / TIVOLI LAKE BLVD 39.066465 -77.031797 No No ... No BLACK F MD 22 12 4 1 1 1
81066 2012-05-18 11:50:00 2012-05-18 11:50:00 2nd district, Bethesda DISPLAY SUSPENDED LIC. 410/CHELTON RD 38.986067 -77.087794 No No ... No BLACK M MD 11 6 2 0 1 2
49576 2013-01-18 23:35:00 2013-01-18 23:35:00 3rd district, Silver Spring DRIVER FAILURE TO OBEY PROPERLY PLACED TRAFFIC... BROADBIRCH DRIVE AT PLUM ORCHARD DRIVE 39.055048 -76.956035 No No ... No BLACK M MD 23 12 3 0 1 1
56950 2014-09-02 08:26:00 2014-09-02 08:26:00 2nd district, Bethesda DRIVING VEHICLE ON HIGHWAY WITH SUSPENDED REGI... 270/ROCKLEDGE DR 39.033668 -77.140857 No No ... No HISPANIC M MD 8 5 2 0 2 1
9041 2015-04-28 01:24:00 2015-04-28 01:24:00 6th district, Gaithersburg / Montgomery Village EXCEEDING MAXIMUM SPEED: 63 MPH IN A POSTED 45... SHADY GROVE RD AND TUPELO DR 39.141567 -77.153003 No No ... No BLACK M MD 1 1 6 0 1 2
21437 2017-08-28 17:36:00 2017-08-28 17:36:00 4th district, Wheaton DRIVER FAILURE TO OBEY PROPERLY PLACED TRAFFIC... CONNECTICUT AVE @ DELANO ST 39.056747 -77.073437 No No ... No BLACK F MD 17 9 4 1 1 1
40002 2014-03-02 01:26:00 2014-03-02 01:26:00 3rd district, Silver Spring FAILURE TO DISPLAY REGISTRATION CARD UPON DEMA... 11TH AVE @ CARROLL AVE 38.996812 -76.990775 No No ... No WHITE M MD 1 1 3 0 5 2
40066 2017-03-05 18:39:00 2017-03-05 18:39:00 4th district, Wheaton PERSON DRIVING MOTOR VEHICLE ON HIGHWAY OR PUB... NB CONNECTICUT AVE @ ASPEN HILL RD 39.081792 -77.079777 No No ... No WHITE F MD 18 10 4 1 5 2
89645 2012-05-08 10:30:00 2012-05-08 10:30:00 6th district, Gaithersburg / Montgomery Village FAILURE VEH. TO YIELD INTERSECTION RIGHT-OF-WA... SHADY GROVE RD / COMPRINT CT 39.118976 -77.182696 No No ... No WHITE M MD 10 6 6 0 5 1
70814 2013-03-31 11:42:00 2013-03-31 11:42:00 6th district, Gaithersburg / Montgomery Village DRIVING VEHICLE ON HIGHWAY WITH SUSPENDED REGI... MONTGOMERY VILLAGE AVE @ RUSSELL AVE 39.156308 -77.206313 No No ... No WHITE F MD 11 6 6 1 5 1
28991 2016-02-04 21:38:00 2016-02-04 21:38:00 2nd district, Bethesda DRIVING VEHICLE ON HIGHWAY WITHOUT CURRENT REG... DEMOCRACY BLVD/ OLD GEORGETOWN RD 39.023542 -77.128128 No No ... No ASIAN F MD 21 11 2 1 0 1
49952 2016-06-15 10:08:00 2016-06-15 10:08:00 1st district, Rockville DRIVER USING HANDS TO USE HANDHELD TELEPHONE W... DIAMONDBACK RD @ BENT TWIG LA 39.114998 -77.214355 No No ... No WHITE M MD 10 6 1 0 5 1
93675 2016-04-19 12:51:00 2016-04-19 12:51:00 4th district, Wheaton FAILURE TO STOP AT STOP SIGN GRANDVIEW AVE@ BLUERIDGE AVE 39.041602 -77.053187 No No ... No WHITE M MD 12 7 4 0 5 1
72664 2015-01-18 19:35:00 2015-01-18 19:35:00 1st district, Rockville EXCEEDING MAXIMUM SPEED: 44 MPH IN A POSTED 35... WOODFIELD RD / FAITH LN 39.298523 -77.199713 No No ... No WHITE M PA 19 10 1 0 5 2
28009 2018-04-05 15:00:00 2018-04-05 15:00:00 3rd district, Silver Spring FAIL OF DRIVER IN ATTENDED VEHICLE ACCIDENT TO... WB BRIGGS CHANEY RD AT OLD COLUMBIA PK 39.053315 -76.962432 No No ... No WHITE M MD 15 8 3 0 5 2

5000 rows × 29 columns

In [21]:
#mlr = sm.ols(formula = 'lifeExp ~ year + continent_Asia + continent_Africa + continent_Europe + continent_Americas + continent_Oceania + year*continent_Asia + year*continent_Africa + year*continent_Europe + year*continent_Americas + year*continent_Oceania', data = data).fit()   
distlr = sm.ols(formula = 'district_num ~ Year + gender_num + race_num + violation_type_num', data = data_reg).fit()

distlr.summary()
Out[21]:
OLS Regression Results
Dep. Variable: district_num R-squared: 0.009
Model: OLS Adj. R-squared: 0.008
Method: Least Squares F-statistic: 11.23
Date: Fri, 13 Dec 2019 Prob (F-statistic): 4.51e-09
Time: 16:39:58 Log-Likelihood: -9502.5
No. Observations: 5000 AIC: 1.902e+04
Df Residuals: 4995 BIC: 1.905e+04
Df Model: 4
Covariance Type: nonrobust
coef std err t P>|t| [0.025 0.975]
Intercept 38.2829 7.642 5.009 0.000 23.301 53.265
Year -0.0171 0.004 -4.493 0.000 -0.025 -0.010
gender_num -0.0587 0.049 -1.196 0.232 -0.155 0.038
race_num -0.0420 0.012 -3.367 0.001 -0.066 -0.018
violation_type_num -0.1448 0.046 -3.159 0.002 -0.235 -0.055
Omnibus: 336.547 Durbin-Watson: 2.017
Prob(Omnibus): 0.000 Jarque-Bera (JB): 138.130
Skew: 0.174 Prob(JB): 1.01e-30
Kurtosis: 2.264 Cond. No. 6.69e+05


Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 6.69e+05. This might indicate that there are
strong multicollinearity or other numerical problems.
In [ ]:

In [ ]: